FROM sequenceiq/hadoop-docker:2.7.1
MAINTAINER liangjie
ENV SPARK_VERSION 2.2.1
ENV SPARK_HADOOP_PROFILE 2.7
#support for Hadoop 2.7
#针对于清华镜像中没有spark2.1.1的版本问题，可以使用官网的链接下载相应的包
RUN curl https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_PROFILE.tgz | tar -xz -C /usr/local/
RUN cd /usr/local && ln -s spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_PROFILE spark
ENV SPARK_HOME /usr/local/spark
RUN mkdir $SPARK_HOME/yarn-remote-client
ADD yarn-remote-client $SPARK_HOME/yarn-remote-client

RUN $BOOTSTRAP && $HADOOP_PREFIX/bin/hadoop dfsadmin -safemode leave && $HADOOP_PREFIX/bin/hdfs dfs -put $SPARK_HOME-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_PROFILE/examples/jars /spark

ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
ENV PATH $PATH:$SPARK_HOME/bin:$HADOOP_PREFIX/bin
# update boot script
COPY bootstrap.sh /etc/bootstrap.sh
RUN chown root.root /etc/bootstrap.sh
RUN chmod 700 /etc/bootstrap.sh

#install R
RUN rpm -ivh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
RUN yum -y install R
RUN yum install -y java-1.8.0-openjdk  java-1.8.0-openjdk-devel
#jdk版本根据安装yum安装的版本进行链接,必须保证先软链接再删除，不然default的软链接会消失
#下边的软链，运行一个容器，跑到yum安装的java_home目录下，根据yum安装的版本，自行更改
RUN cd /usr/java && rm -f latest && rm -rf jdk1.7.0_71 && ln -s /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.161-3.b14.el6_9.x86_64 latest
ENTRYPOINT ["/etc/bootstrap.sh","-d"]
